package paper.extractor;

import java.io.IOException;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class PMCExtractor implements LinkToPaperExtractor {
	private static final String RESOURCE_URL = "http://www.ncbi.nlm.nih.gov";
	private static final String READER_PARAM = "?report=reader";
	
	private String URL;
	
	public PMCExtractor(String URL) {
		this.URL = URL;
	}
	
	@Override
	public String extractLinkToPaper(String articleURL) {
		String link = null;
		Document doc;
		try {
			doc = Jsoup.connect(articleURL+READER_PARAM).userAgent("Mozilla/5.0").get();
			Elements links = doc.select("a.int-view");
			for (Element l: links) {
				String attrHref = l.attr("href");
				if (attrHref!=null && attrHref.contains(".pdf")) {
					link = RESOURCE_URL+l.attr("href");
					break;
				}
			}
		} catch (IOException e) {
			e.printStackTrace();
		}
		
		return link;
	}

}
